#clean up
rm(list=ls())

#load packages
library(readxl) #replaces XLConnect
library(foreign)
library(car)

#front matter
setwd("/Volumes/MONOGAN/psrsd/anxiety2016/data/anes_timeseries_2016_dta/")

###OPEN ENDED RESPONSE MANAGEMENT###
text=as.data.frame(read_excel("anes2016presOnly.xlsx",sheet=1,col_names=TRUE))
#names(text)
#head(text)
#tail(text)
#text=subset(text.0,select=c(caseid,candlik_likewhatdpc))

##Democratic likes##
#examine, recode empty observations
text$candlik_likewhatdpc[1:15]
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-1 Inapplicable"]=NA
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_likewhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)

#check for empty strings, turn to missing
text$candlik_likewhatdpc[text$candlik_likewhatdpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatdpc.list=strsplit(text$candlik_likewhatdpc,split=","); head(candlik_likewhatdpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatdpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatdpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$dem.like=count-empty
head(text$candlik_likewhatdpc,15);head(text$dem.like,15)
tail(text$candlik_likewhatdpc,15);tail(text$dem.like,15)

##Democratic dislikes##
#examine, recode empty observations
text$candlik_dislwhatdpc[1:15]
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc, 15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_dislwhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)

#check for empty strings, turn to missing
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_dislwhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatdpc.list=strsplit(text$candlik_dislwhatdpc,split=","); head(candlik_dislwhatdpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatdpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatdpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$dem.dislike=count-empty
head(text$candlik_dislwhatdpc,15);head(text$dem.dislike,15)
tail(text$candlik_dislwhatdpc,15);tail(text$dem.dislike,15)

##Republican likes##
#examine, recode empty observations
text$candlik_likewhatrpc[1:15]
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-1 Inapplicable"]=NA
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_likewhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)

#check for empty strings, turn to missing
text$candlik_likewhatrpc[text$candlik_likewhatrpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_likewhatrpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatrpc.list=strsplit(text$candlik_likewhatrpc,split=","); head(candlik_likewhatrpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatrpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatrpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$rep.like=count-empty
head(text$candlik_likewhatrpc,15);head(text$rep.like,15)
tail(text$candlik_likewhatrpc,15);tail(text$rep.like,15)

##Republican dislikes##
#examine, recode empty observations
text$candlik_dislwhatrpc[1:15]
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_dislwhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)

#check for empty strings, turn to missing
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc ==""]=NA
text$candlik_dislwhatrpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatrpc.list=strsplit(text$candlik_dislwhatrpc,split=","); head(candlik_dislwhatrpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatrpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatrpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$rep.dislike=count-empty
head(text$candlik_dislwhatrpc,15);head(text$rep.dislike,15)
tail(text$candlik_dislwhatrpc,15);tail(text$rep.dislike,15)

##Create the end count file##
text.count=subset(text,select=c(V160001,dem.like,dem.dislike,rep.like,rep.dislike))#V160001 is the "caseid"
#write.csv(text.count,"textCount2016.csv",row.names=F)


###QUANTITATIVE RESPONSE MANAGEMENT###
#load data
anes.0=read.dta("anes_timeseries_2016_Stata12.dta",convert.factors=F)

#subset
anes.1=subset(anes.0,select=c(version,V160001,V162034a,
V161116,V161117,V161118,V161119,V161120,V161121,V161122,V161123,V161124,V161125,
V161158x,V161178,V161179,V161180,V161181,V161182,V161183,V161184,V161185,V161186,V161189,V161190,V161191,V161198,V161199,V161200,V161201,V161202,V161203,V161068,V161071,V161074,V161077,V161126,V161086,V161087,V162031x))

#rename
names(anes.1)<-c("version","caseid","postvote_presvtwho",
"dAngry","dHope","dAfraid","dProud","dDisgust","rAngry","rHope","rAfraid","rProud","rDisgust",
"pid_x","spsrvpr_ssself","spsrvpr_ssdpc","spsrvpr_ssrpc","defsppr_self","defsppr_dpc","defsppr_rpc","inspre_self","inspre_dpc","inspre_rpc","guarpr_self","guarpr_dpc","guarpr_rpc","aidblack_self","aidblack_dpc","aidblack_rpc","envjob_self","envjob_dpc","envjob_rpc","candlik_likedpc","candlik_disldpc","candlik_likerpc","candlik_dislrpc","ideol","demTherm","repTherm","voted")
names(text.count)[1]<-"caseid"

#merge
anes=merge(x=anes.1,y=text.count,by='caseid')

#who voted? whom did they vote for?
#also, eliminate true independents (pid_x==4)
#also, code third-party voters as "nonvoters"
anes$voted[anes$vote<0]<-NA
anes=subset(anes,subset=!is.na(voted) & pid_x!=4)
anes=subset(anes,subset=!anes$postvote_presvtwho%in%c(-9,-8,-7,-6))
anes$voted[anes$postvote_presvtwho%in%c(-1,3,4,5,7,9)]<-0
anes$postvote_presvtwho[anes$postvote_presvtwho%in%c(-1,3,4,5,9)]<- 10
anes$vote.rep=anes$postvote_presvtwho-1

#data cleaning, any negative number is out, as these are all missing data codes
anes[anes<0]=NA
anes[anes==99]=NA
#summary(anes)
#table(anes<0)

#rescale partisanship
anes$pid_x=(anes$pid_x-min(anes$pid_x,na.rm=T))/(max(anes$pid_x,na.rm=T)-min(anes$pid_x,na.rm=T))

#issue advantage measure
anes$issues=abs(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-abs(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)+abs(anes$defsppr_dpc-anes$defsppr_self)-abs(anes$defsppr_rpc-anes$defsppr_self)+abs(anes$inspre_dpc-anes$inspre_self)-abs(anes$inspre_rpc-anes$inspre_self)+abs(anes$guarpr_dpc-anes$guarpr_self)-abs(anes$guarpr_rpc-anes$guarpr_self)+abs(anes$aidblack_dpc-anes$aidblack_self)-abs(anes$aidblack_rpc-anes$aidblack_self)+abs(anes$envjob_dpc-anes$envjob_self)-abs(anes$envjob_rpc-anes$envjob_self)
anes$issues =(anes$issues-min(anes$issues,na.rm=T))/(max(anes$issues,na.rm=T)-min(anes$issues,na.rm=T))

#Cronbach's alpha on issue measures
library(psych)
issue.reliability<-subset(anes,select=c(caseid))
issue.reliability$spsrvpr<-abs(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-abs(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)
issue.reliability$defsppr<-abs(anes$defsppr_dpc-anes$defsppr_self)-abs(anes$defsppr_rpc-anes$defsppr_self)
issue.reliability$inspre<-abs(anes$inspre_dpc-anes$inspre_self)-abs(anes$inspre_rpc-anes$inspre_self)
issue.reliability$guarpr<-abs(anes$guarpr_dpc-anes$guarpr_self)-abs(anes$guarpr_rpc-anes$guarpr_self)
issue.reliability$aidblack<-abs(anes$aidblack_dpc-anes$aidblack_self)-abs(anes$aidblack_rpc-anes$aidblack_self)
issue.reliability$envjob<-abs(anes$envjob_dpc-anes$envjob_self)-abs(anes$envjob_rpc-anes$envjob_self)
head(issue.reliability)
issue.reliability<-issue.reliability[,-1]
alpha(issue.reliability)

#alternate issue advantage measure for longer-term analysis
anes$issues.3=abs(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-abs(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)+abs(anes$defsppr_dpc-anes$defsppr_self)-abs(anes$defsppr_rpc-anes$defsppr_self)+abs(anes$guarpr_dpc-anes$guarpr_self)-abs(anes$guarpr_rpc-anes$guarpr_self)
anes$issues.3 =(anes$issues.3-min(anes$issues.3,na.rm=T))/(max(anes$issues.3,na.rm=T)-min(anes$issues.3,na.rm=T))
time.issue<-anes[,c("caseid","issues.3")]
#write.csv(time.issue,"timeIssue16.csv",row.names=F)
anes<-subset(anes,select=-c(issues.3))

#squared Euclidean distance on issues
squareit<-function(x){x^2}
anes$issues.sq=squareit(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-squareit(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)+squareit(anes$defsppr_dpc-anes$defsppr_self)-squareit(anes$defsppr_rpc-anes$defsppr_self)+squareit(anes$inspre_dpc-anes$inspre_self)-squareit(anes$inspre_rpc-anes$inspre_self)+squareit(anes$guarpr_dpc-anes$guarpr_self)-squareit(anes$guarpr_rpc-anes$guarpr_self)+squareit(anes$aidblack_dpc-anes$aidblack_self)-squareit(anes$aidblack_rpc-anes$aidblack_self)+squareit(anes$envjob_dpc-anes$envjob_self)-squareit(anes$envjob_rpc-anes$envjob_self)
anes$issues.sq =(anes$issues.sq-min(anes$issues.sq,na.rm=T))/(max(anes$issues.sq,na.rm=T)-min(anes$issues.sq,na.rm=T))

#directional on issues
for(i in 1:dim(anes)[1]){
anes$issues.direction[i]=-prod(anes$spsrvpr_ssdpc[i]-4,anes$spsrvpr_ssself[i]-4)+prod(anes$spsrvpr_ssrpc[i]-4,anes$spsrvpr_ssself[i]-4)-prod(anes$defsppr_dpc[i]-4,anes$defsppr_self[i]-4)+prod(anes$defsppr_rpc[i]-4,anes$defsppr_self[i]-4)-prod(anes$inspre_dpc[i]-4,anes$inspre_self[i]-4)+prod(anes$inspre_rpc[i]-4,anes$inspre_self[i]-4)-prod(anes$guarpr_dpc[i]-4,anes$guarpr_self[i]-4)+prod(anes$guarpr_rpc[i]-4,anes$guarpr_self[i]-4)-prod(anes$aidblack_dpc[i]-4,anes$aidblack_self[i]-4)+prod(anes$aidblack_rpc[i]-4,anes$aidblack_self[i]-4)-prod(anes$envjob_dpc[i]-4,anes$envjob_self[i]-4)+prod(anes$envjob_rpc[i]-4,anes$envjob_self[i]-4)
}
anes$issues.direction =(anes$issues.direction-min(anes$issues.direction,na.rm=T))/(max(anes$issues.direction,na.rm=T)-min(anes$issues.direction,na.rm=T))

#using averages for candidate placement
anes$issues.mean=abs(mean(anes$spsrvpr_ssdpc,na.rm=T)-anes$spsrvpr_ssself)-abs(mean(anes$spsrvpr_ssrpc,na.rm=T)-anes$spsrvpr_ssself)+abs(mean(anes$defsppr_dpc,na.rm=T)-anes$defsppr_self)-abs(mean(anes$defsppr_rpc,na.rm=T)-anes$defsppr_self)+abs(mean(anes$inspre_dpc,na.rm=T)-anes$inspre_self)-abs(mean(anes$inspre_rpc,na.rm=T)-anes$inspre_self)+abs(mean(anes$guarpr_dpc,na.rm=T)-anes$guarpr_self)-abs(mean(anes$guarpr_rpc,na.rm=T)-anes$guarpr_self)+abs(mean(anes$aidblack_dpc,na.rm=T)-anes$aidblack_self)-abs(mean(anes$aidblack_rpc,na.rm=T)-anes$aidblack_self)+abs(mean(anes$envjob_dpc,na.rm=T)-anes$envjob_self)-abs(mean(anes$envjob_rpc,na.rm=T)-anes$envjob_self)
anes$issues.mean =(anes$issues.mean-min(anes$issues.mean,na.rm=T))/(max(anes$issues.mean,na.rm=T)-min(anes$issues.mean,na.rm=T))
#cor(anes$issues,anes$issues.mean,use="complete.obs")

#ideology
anes$ideol=(anes$ideol-min(anes$ideol,na.rm=T))/(max(anes$ideol,na.rm=T)-min(anes$ideol,na.rm=T))

#candidate personal quality measure
anes$personal=anes$rep.like+anes$dem.dislike-anes$rep.dislike-anes$dem.like

#Scale emotion coding to run on a 0 to 1 scale.
anes$dAngry<-(anes$dAngry-1)/4
anes$dHope <-(anes$dHope-1)/4
anes$dAfraid <-(anes$dAfraid-1)/4
anes$dProud <-(anes$dProud-1)/4
anes$dDisgust <-(anes$dDisgust-1)/4
anes$rAngry <-(anes$rAngry-1)/4
anes$rHope <-(anes$rHope-1)/4
anes$rAfraid <-(anes$rAfraid-1)/4
anes$rProud <-(anes$rProud-1)/4
anes$rDisgust <-(anes$rDisgust-1)/4

#candidate own emotions
anes$democrat=as.numeric(anes$pid_x<.5)
anes$ang.own=ifelse(anes$democrat==1,anes$dAngry,anes$rAngry)
anes$hp.own=ifelse(anes$democrat==1,anes$dHope,anes$rHope)
anes$afr.own=ifelse(anes$democrat==1,anes$dAfraid,anes$rAfraid)
anes$prd.own=ifelse(anes$democrat==1,anes$dProud,anes$rProud)
anes$disg.own=ifelse(anes$democrat==1,anes$dDisgust,anes$rDisgust)

###Write Data###
selection=na.omit(subset(anes,select=c(caseid,vote.rep,pid_x,issues,issues.sq,issues.direction,issues.mean,personal,ang.own,hp.own,afr.own,prd.own,disg.own,ideol,voted)))
summary(selection$personal)
selection$personal=(selection$personal-min(selection$personal,na.rm=T))/(max(selection$personal,na.rm=T)-min(selection$personal,na.rm=T)) #rescale "personal" as extreme values get stripped-out due to other missing variables.
selection$vote.rep[selection$vote.rep==9]<-NA
selection<-selection[order(selection$voted),]
table(selection$voted)#Observations 1-340 did not vote for two major. Observations 341-1421 did vote.
#selection[340:341,]
write.table(selection,"anes16selection.txt",row.names=F)

thermData=na.omit(subset(anes,subset=voted==1,select=c(caseid,vote.rep,pid_x,issues,issues.sq,issues.direction,issues.mean,personal,ang.own,hp.own,afr.own,prd.own,disg.own,ideol,demTherm,repTherm,dAngry,dHope,dAfraid,dProud,dDisgust,rAngry,rHope,rAfraid,rProud,rDisgust)))
summary(thermData$personal)
thermData$personal=(thermData$personal-min(thermData$personal,na.rm=T))/(max(thermData$personal,na.rm=T)-min(thermData$personal,na.rm=T)) #rescale "personal" as extreme values get stripped-out due to other missing variables.
summary(thermData); dim(thermData)
write.table(thermData,"thermData.txt",row.names=F)

anes=na.omit(subset(anes,subset=voted==1,select=c(caseid,vote.rep,pid_x,issues,issues.sq,issues.direction,issues.mean,personal,ang.own,hp.own,afr.own,prd.own,disg.own,ideol)))
summary(anes$personal)
anes$personal=(anes$personal-min(anes$personal,na.rm=T))/(max(anes$personal,na.rm=T)-min(anes$personal,na.rm=T)) #rescale "personal" as extreme values get stripped-out due to other missing variables.
summary(anes); dim(anes)
write.table(anes,"anes16additional.txt",row.names=F)
